/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/17/2024

Description: This program generates investigator-level placement and outcome
rates, as well as standard errors for these estimates. In contrast to our baseline
specification, this program adjusts these rates for a set of covariates.

The program takes as an input (i) a child by investigation level dataset spanning 
January 2008 to December 2019, subject to the sample restrictions discussed in the paper,
and (ii) a dataset containing the number of children in the focal investigation (a 
key covariate used in the covariate adjustment).
 
Using these child by investigation level datasets, the program then estimates covariate 
adjusted investigator-specific rates and standard errors using a linear adjustment to 
account for randomization strata, which we discuss in the paper.  

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close
set seed 02042023

*Set directories 
global cleandata 
global tmpdata 

**************************
**(1) ESTIMATE COV ADJUSTED AT-HOME MALTREATMENT AND PLACEMENT RATES/SE
**************************
use "${cleandata}analysis_sample_investigators_qje.dta", clear 

*Gen additional variables needed for covariate adjustment
gen parent = mom==1 | dad==1 | parent_
gen rel2 = notrel==0 & parent==0 
gen neglect = phyneg==1 | medneg==1 | failprot==1 | impsup ==1 

*Merge to information on family size 
cap drop _merge 
merge 1:1 vicid inv_caseid using "${tmpdata}num_children_2008_2019", keepus(num_children) keep(1 3)

foreach x in white female age_inv phyab neglect maltreat parent rel notrel num_children {
	gen miss_`x' = `x'==.
	replace `x'=0 if `x'==.
}

keep worker_id pre_black fc inv6m count_inv bshare rotationgroup vicid female age_inv ///
phyab neglect maltreat parent rel notrel num_children 

foreach x in female age_inv phyab neglect maltreat parent rel notrel num_children {
	summarize `x'
	gen `x'_control = `x' - r(mean)
}

global covs female_control age_inv_control  phyab_control  neglect_control  maltreat_control  parent_control  rel_control  notrel_control  num_children_control 
rename pre_black black 
gen white = black==0

global fixedeffect rotationgroup 

//get levels of invid
egen grpworker_id = group(worker_id)
levelsof grpworker_id
local levels = "`r(levels)'"

preserve 
foreach var in fc inv6m {
	qui {
	if "`var'"=="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black $covs, absorb($fixedeffect) resid
	}
	
	* For other outcomes, condition on left at home
	if "`var'"!="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black $covs if fc==0, resid absorb($fixedeffect)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_`var' = .
	gen b_se_`var' = .
	gen w_`var' = .
	gen w_se_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
            replace w_`var' =  r(estimate) if grpworker_id == `i'
			replace w_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
            replace b_`var' =  r(estimate) if grpworker_id == `i'
			replace b_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

* Save investigator-level dataset 
cap drop count_inv 
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep b_* w_* worker_id count_inv count_black count_white grpworker_id
duplicates drop worker_id, force 	
drop if worker_id == .
save "${cleandata}inv_rates_covariates_qje.dta", replace
restore 


*Clustered standard errors:
foreach var in fc inv6m {
	qui {
	if "`var'"=="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black $covs, resid absorb($fixedeffect) cluster(worker_id vicid)
	}
	
	* For other outcomes, condition on left at home
	if "`var'"!="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black $covs if fc==0, resid absorb($fixedeffect) cluster(worker_id vicid)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_twse_`var' = .
	gen w_twse_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

cap drop count_inv 
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep b_* w_* worker_id count_inv count_black count_white grpworker_id
duplicates drop worker_id, force 	
drop if worker_id == .

*Merge clustered standard errors with rates and save dataset
merge 1:1 worker_id using "${cleandata}inv_rates_covariates_qje.dta", keep(1 3) keepus(b_fc w_fc b_inv6m w_inv6m b_se_fc w_se_fc b_se_inv6m w_se_inv6m)

	foreach x in w_fc b_fc {
		replace `x' = 1 - `x'
	}	
	gen sh_black = count_black/count_inv 
	sum sh_black [aw=count_inv] 
	gen bshare = r(mean)
	local bshare = r(mean)
		
	rename (w_fc b_fc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	foreach x in D_w D_b Y_w Y_b {
		replace `x' = 0 if `x'<0 
		replace `x' = 1 if `x'>1 
	}
	
	gen D_w2 = D_w*D_w 
	gen D_b2 = D_b*D_b

save "${cleandata}inv_rates_se_covariates_qje.dta", replace